* This script contains the code to replicate Table B of the on-line appendix in Ductor, L., Fafchamps, M., Goyal S. and M. van der Leij. Social Networks and Research Output. The Review of Economics and Statistics.
/*Using different functional forms: log(x+1), Poisson, Non-negative Binomial, Zero Inflated Negative Binomial Model and model in level*/


log using TableB.log, replace

use prodndnp_fullsample.dta, clear

sort auth year

/*CHOOSING THE FUNCTIONAL FORM OF THE FUTURE PRODUCTIVITY VARIABLE*/

/*Unrestricted Baseline model*/

// MODEL 1. Predictability accuracy of the baseline model using the log(x+1) specification//

reg lprodf3 lprodf3l-lprodf3l13 t2-t27 y2-y27 nopapers if group==1 /*estimating the baseline model using the training group*/
estat ic 
gen e2=1 if e(sample)
scalar r2i1outb= e(r2)   /*computing the R2*/
// out-of-sample prediction
predict q if group==2 & lprodf3<.,xb /*prediction out-of-sample*/
gen qhat=exp(q) * exp(0.5 * e(rmse)^2)-1 /*correcting for the bias of the anti-log assuming normality and homoskedasticity*/
gen sqerr= (prodf3-qhat)^2 if qhat<.
quietly egen pe2=sum(sqerr)if qhat<.  /*RSS of the predicted model*/
quietly sum pe2    /*summarizing the RSS*/
quietly scalar rss2=r(max)  /*obtaining the total RSS*/
quietly scalar n2=r(N)    /*number of observations in group 2*/
scalar rmseo2outb=(rss2/n2)^0.5   /*RMSE out of sample*/
di rmseo2outb


// MODEL 1b. Predictability accuracy of the baseline model using the log(x+1) specification//

reg lprodf3 lprodf3l-lprodf3l13 t2-t27 y2-y27 nopapers if e2==1/*estimating the baseline model using the training group*/
estat ic 
scalar r2i1outb= e(r2)   /*computing the R2*/
// out-of-sample prediction
predict qb if group==2 & lprodf3<.,xb /*prediction out-of-sample*/
predict ub if group==2 & lprodf3<.,resid
gen expub=exp(ub)
quietly sum expub
gen qhatb=exp(qb)*r(mean)-1 /*correcting for the bias of the anti-log assuming normality and homoskedasticity*/
gen sqerrb= (prodf3-qhatb)^2 if qhatb<.
quietly egen pe2b=sum(sqerrb)if qhatb<.  /*RSS of the predicted model*/
quietly sum pe2b    /*summarizing the RSS*/
quietly scalar rss2b=r(max)  /*obtaining the total RSS*/
quietly scalar n2b=r(N)    /*number of observations in group 2*/
scalar rmseo2boutb=(rss2b/n2b)^0.5   /*RMSE out of sample*/
di rmseo2boutb

//MODEL 2. Predictability accuracy of the baseline model using the Poisson specification//

poisson prodf3 lprodf3l-lprodf3l13 t2-t27 y2-y27 nopapers if e2==1 /*estimating the baseline model using the training group*/
estat ic
estat gof
scalar r2i1m2= e(r2)   /*computing the R2*/
// out-of-sample prediction 
predict um2 if group==2 & prodf3<.,xb /*prediction out-of-sample*/
gen sqerrm2= (prodf3-um2)^2 if um2<.
quietly egen pem2=sum(sqerrm2)if um2<.  /*RSS of the predicted model*/
quietly sum pem2    /*summarizing the RSS*/
quietly scalar rssm2=r(max)  /*obtaining the total RSS*/
quietly scalar n2=r(N)    /*number of observations in group 2*/
scalar rmseo2m2=(rssm2/n2)^0.5   /*RMSE out of sample*/
di rmseo2m2

//MODEL 3. Predictability accuracy of the baseline model using the Negative Binomial specification//

nbreg prodf3 lprodf3l-lprodf3l13 t2-t27 y2-y27 nopapers if e2==1 /*estimating the baseline model using the training group*/
estat ic
scalar r2i1m2= e(r2)   /*computing the R2*/
// out-of-sample prediction 
predict um3 if group==2 & lprodf3<.,xb /*prediction out-of-sample*/
gen sqerrm3= (prodf3-um3)^2 if um3<.
quietly egen pem3=sum(sqerrm3)if um3<.  /*RSS of the predicted model*/
quietly sum pem3    /*summarizing the RSS*/
quietly scalar rssm3=r(max)  /*obtaining the total RSS*/
quietly scalar n3=r(N)    /*number of observations in group 2*/
scalar rmseo2m3=(rssm3/n3)^0.5   /*RMSE out of sample*/
di rmseo2m3


//MODEL 4. Predictability accuracy of the baseline model using the Zero inflated Negative binomial model//

zinb prodf3 lprodf3l-lprodf3l13 t2-t27 y2-y27 nopapers if e2==1, inflate(nopapers) vuong 
estat ic
scalar r2i1m2= e(r2)   /*computing the R2*/
// out-of-sample prediction 
predict um4 if group==2 & prodf3<.,xb /*prediction out-of-sample*/
gen sqerrm4= (prodf3-um4)^2 if um4<.
quietly egen pem4=sum(sqerrm4)if um4<.  /*RSS of the predicted model*/
quietly sum pem4    /*summarizing the RSS*/
quietly scalar rssm4=r(max)  /*obtaining the total RSS*/
quietly scalar n4=r(N)    /*number of observations in group 2*/
scalar rmseo2m4=(rssm4/n4)^0.5   /*RMSE out of sample*/
di rmseo2m4

// MODEL 5. Predictability accuracy of the baseline model using the level specification//

reg prodf3 lprodf3l-lprodf3l13 t2-t27 y2-y27 nopapers if e2==1/*estimating the baseline model using the training group*/
estat ic 
scalar r2i1outb5= e(r2)   /*computing the R2 */
// out-of-sample prediction
predict qhat5 if group==2 & prodf3<.,xb /*prediction out-of-sample*/
gen sqerr5= (prodf3-qhat5)^2 if qhat5<.
quietly egen pe5=sum(sqerr5)if qhat5<.  /*RSS of the predicted model*/
quietly sum pe5    /*summarizing the RSS*/
quietly scalar rss5=r(max)  /*obtaining the total RSS*/
quietly scalar n5=r(N)    /*number of observations in group 2*/
scalar rmseo5outb=(rss5/n5)^0.5   /*RMSE out of sample*/
di rmseo5outb

log close



